df <- as_tibble(read.table("winequality-red.csv", sep=";", header = TRUE))
df_white <- as_tibble(read.table("winequality-white.csv", sep=";", header = TRUE))

Rudimentary LM on whole dataset

lm_basic <- lm(data = df, quality ~ .)

summary(lm_basic)
## 
## Call:
## lm(formula = quality ~ ., data = df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.68911 -0.36652 -0.04699  0.45202  2.02498 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           2.197e+01  2.119e+01   1.036   0.3002    
## fixed.acidity         2.499e-02  2.595e-02   0.963   0.3357    
## volatile.acidity     -1.084e+00  1.211e-01  -8.948  < 2e-16 ***
## citric.acid          -1.826e-01  1.472e-01  -1.240   0.2150    
## residual.sugar        1.633e-02  1.500e-02   1.089   0.2765    
## chlorides            -1.874e+00  4.193e-01  -4.470 8.37e-06 ***
## free.sulfur.dioxide   4.361e-03  2.171e-03   2.009   0.0447 *  
## total.sulfur.dioxide -3.265e-03  7.287e-04  -4.480 8.00e-06 ***
## density              -1.788e+01  2.163e+01  -0.827   0.4086    
## pH                   -4.137e-01  1.916e-01  -2.159   0.0310 *  
## sulphates             9.163e-01  1.143e-01   8.014 2.13e-15 ***
## alcohol               2.762e-01  2.648e-02  10.429  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.648 on 1587 degrees of freedom
## Multiple R-squared:  0.3606, Adjusted R-squared:  0.3561 
## F-statistic: 81.35 on 11 and 1587 DF,  p-value: < 2.2e-16

Volatile acidity, chlorides, total sulfur dioxide, sulphates and alcohol are all very statistically significant, although our adjusted \(R^2\) is still quite low.

Ridge regression

train_control <- trainControl(method  = "cv", number = 5)

model_ridge <- train(quality ~ .,
                     data = df,
                     method = "ridge",           # method
                     trControl = train_control)        # cross validation

model_ridge
## Ridge Regression 
## 
## 1599 samples
##   11 predictor
## 
## No pre-processing
## Resampling: Cross-Validated (5 fold) 
## Summary of sample sizes: 1278, 1280, 1279, 1279, 1280 
## Resampling results across tuning parameters:
## 
##   lambda  RMSE       Rsquared   MAE      
##   0e+00   0.6514278  0.3491897  0.5041809
##   1e-04   0.6514270  0.3491918  0.5041797
##   1e-01   0.6516483  0.3494942  0.5036084
## 
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was lambda = 1e-04.
#something doesn't seem to be working here
model_stepwise <- train(quality ~ .,
                        data = df,
                        method = "glmStepAIC",
                        trControl = train_control)
## Start:  AIC=2528.1
## .outcome ~ fixed.acidity + volatile.acidity + citric.acid + residual.sugar + 
##     chlorides + free.sulfur.dioxide + total.sulfur.dioxide + 
##     density + pH + sulphates + alcohol
## 
##                        Df Deviance    AIC
## - density               1   529.86 2526.6
## - fixed.acidity         1   530.04 2527.0
## - residual.sugar        1   530.05 2527.0
## - citric.acid           1   530.12 2527.2
## <none>                      529.67 2528.1
## - pH                    1   530.72 2528.6
## - free.sulfur.dioxide   1   530.76 2528.7
## - chlorides             1   534.06 2536.7
## - total.sulfur.dioxide  1   535.48 2540.1
## - sulphates             1   545.02 2562.6
## - volatile.acidity      1   561.57 2600.9
## - alcohol               1   565.36 2609.5
## 
## Step:  AIC=2526.55
## .outcome ~ fixed.acidity + volatile.acidity + citric.acid + residual.sugar + 
##     chlorides + free.sulfur.dioxide + total.sulfur.dioxide + 
##     pH + sulphates + alcohol
## 
##                        Df Deviance    AIC
## - fixed.acidity         1   530.04 2525.0
## - residual.sugar        1   530.05 2525.0
## - citric.acid           1   530.32 2525.7
## <none>                      529.86 2526.6
## - free.sulfur.dioxide   1   531.03 2527.4
## - pH                    1   532.24 2530.3
## - chlorides             1   534.41 2535.5
## - total.sulfur.dioxide  1   535.82 2538.9
## - sulphates             1   545.44 2561.6
## - volatile.acidity      1   562.95 2602.0
## - alcohol               1   624.15 2734.0
## 
## Step:  AIC=2525
## .outcome ~ volatile.acidity + citric.acid + residual.sugar + 
##     chlorides + free.sulfur.dioxide + total.sulfur.dioxide + 
##     pH + sulphates + alcohol
## 
##                        Df Deviance    AIC
## - residual.sugar        1   530.27 2523.6
## - citric.acid           1   530.32 2523.7
## <none>                      530.04 2525.0
## - free.sulfur.dioxide   1   531.30 2526.0
## - pH                    1   534.59 2533.9
## - chlorides             1   535.38 2535.8
## - total.sulfur.dioxide  1   537.15 2540.0
## - sulphates             1   545.83 2560.5
## - volatile.acidity      1   564.01 2602.5
## - alcohol               1   624.38 2732.5
## 
## Step:  AIC=2523.55
## .outcome ~ volatile.acidity + citric.acid + chlorides + free.sulfur.dioxide + 
##     total.sulfur.dioxide + pH + sulphates + alcohol
## 
##                        Df Deviance    AIC
## - citric.acid           1   530.49 2522.1
## <none>                      530.27 2523.6
## - free.sulfur.dioxide   1   531.64 2524.9
## - pH                    1   534.91 2532.7
## - chlorides             1   535.52 2534.2
## - total.sulfur.dioxide  1   537.19 2538.1
## - sulphates             1   545.87 2558.6
## - volatile.acidity      1   564.02 2600.5
## - alcohol               1   626.65 2735.1
## 
## Step:  AIC=2522.09
## .outcome ~ volatile.acidity + chlorides + free.sulfur.dioxide + 
##     total.sulfur.dioxide + pH + sulphates + alcohol
## 
##                        Df Deviance    AIC
## <none>                      530.49 2522.1
## - free.sulfur.dioxide   1   532.01 2523.7
## - pH                    1   535.26 2531.5
## - chlorides             1   536.20 2533.8
## - total.sulfur.dioxide  1   537.94 2537.9
## - sulphates             1   545.94 2556.8
## - volatile.acidity      1   571.78 2616.0
## - alcohol               1   628.70 2737.3
## Start:  AIC=2540.79
## .outcome ~ fixed.acidity + volatile.acidity + citric.acid + residual.sugar + 
##     chlorides + free.sulfur.dioxide + total.sulfur.dioxide + 
##     density + pH + sulphates + alcohol
## 
##                        Df Deviance    AIC
## - density               1   534.98 2539.8
## - free.sulfur.dioxide   1   535.23 2540.4
## <none>                      534.55 2540.8
## - fixed.acidity         1   535.45 2540.9
## - residual.sugar        1   535.79 2541.8
## - citric.acid           1   536.05 2542.4
## - pH                    1   537.03 2544.7
## - total.sulfur.dioxide  1   541.02 2554.2
## - chlorides             1   542.79 2558.4
## - sulphates             1   559.56 2597.3
## - volatile.acidity      1   560.58 2599.7
## - alcohol               1   574.32 2630.6
## 
## Step:  AIC=2539.84
## .outcome ~ fixed.acidity + volatile.acidity + citric.acid + residual.sugar + 
##     chlorides + free.sulfur.dioxide + total.sulfur.dioxide + 
##     pH + sulphates + alcohol
## 
##                        Df Deviance    AIC
## - fixed.acidity         1   535.47 2539.0
## - free.sulfur.dioxide   1   535.77 2539.7
## - residual.sugar        1   535.79 2539.8
## <none>                      534.98 2539.8
## - citric.acid           1   536.50 2541.4
## - pH                    1   540.55 2551.1
## - total.sulfur.dioxide  1   541.79 2554.0
## - chlorides             1   543.63 2558.4
## - sulphates             1   559.89 2596.1
## - volatile.acidity      1   562.39 2601.8
## - alcohol               1   641.22 2769.7
## 
## Step:  AIC=2538.99
## .outcome ~ volatile.acidity + citric.acid + residual.sugar + 
##     chlorides + free.sulfur.dioxide + total.sulfur.dioxide + 
##     pH + sulphates + alcohol
## 
##                        Df Deviance    AIC
## <none>                      535.47 2539.0
## - residual.sugar        1   536.33 2539.1
## - free.sulfur.dioxide   1   536.34 2539.1
## - citric.acid           1   536.50 2539.4
## - total.sulfur.dioxide  1   544.02 2557.3
## - pH                    1   545.46 2560.7
## - chlorides             1   545.86 2561.6
## - sulphates             1   560.93 2596.5
## - volatile.acidity      1   562.71 2600.5
## - alcohol               1   642.06 2769.4
## Start:  AIC=2567.19
## .outcome ~ fixed.acidity + volatile.acidity + citric.acid + residual.sugar + 
##     chlorides + free.sulfur.dioxide + total.sulfur.dioxide + 
##     density + pH + sulphates + alcohol
## 
##                        Df Deviance    AIC
## - citric.acid           1   546.35 2565.8
## - residual.sugar        1   546.48 2566.1
## - density               1   546.72 2566.6
## - fixed.acidity         1   546.76 2566.7
## <none>                      546.11 2567.2
## - pH                    1   547.18 2567.7
## - free.sulfur.dioxide   1   548.11 2569.9
## - chlorides             1   550.70 2575.9
## - total.sulfur.dioxide  1   552.51 2580.1
## - volatile.acidity      1   567.64 2614.7
## - sulphates             1   569.33 2618.5
## - alcohol               1   580.94 2644.3
## 
## Step:  AIC=2565.76
## .outcome ~ fixed.acidity + volatile.acidity + residual.sugar + 
##     chlorides + free.sulfur.dioxide + total.sulfur.dioxide + 
##     density + pH + sulphates + alcohol
## 
##                        Df Deviance    AIC
## - residual.sugar        1   546.68 2564.5
## - fixed.acidity         1   546.81 2564.8
## - density               1   546.95 2565.2
## <none>                      546.35 2565.8
## - pH                    1   547.39 2566.2
## - free.sulfur.dioxide   1   548.64 2569.1
## - chlorides             1   551.82 2576.5
## - total.sulfur.dioxide  1   553.98 2581.5
## - sulphates             1   569.47 2616.8
## - volatile.acidity      1   572.64 2623.9
## - alcohol               1   581.07 2642.6
## 
## Step:  AIC=2564.53
## .outcome ~ fixed.acidity + volatile.acidity + chlorides + free.sulfur.dioxide + 
##     total.sulfur.dioxide + density + pH + sulphates + alcohol
## 
##                        Df Deviance    AIC
## - fixed.acidity         1   546.91 2563.1
## - density               1   546.97 2563.2
## <none>                      546.68 2564.5
## - pH                    1   548.34 2566.4
## - free.sulfur.dioxide   1   549.26 2568.6
## - chlorides             1   552.18 2575.3
## - total.sulfur.dioxide  1   554.18 2579.9
## - sulphates             1   569.60 2615.1
## - volatile.acidity      1   573.34 2623.4
## - alcohol               1   598.73 2678.9
## 
## Step:  AIC=2563.06
## .outcome ~ volatile.acidity + chlorides + free.sulfur.dioxide + 
##     total.sulfur.dioxide + density + pH + sulphates + alcohol
## 
##                        Df Deviance    AIC
## - density               1   546.98 2561.2
## <none>                      546.91 2563.1
## - free.sulfur.dioxide   1   549.51 2567.1
## - pH                    1   552.91 2575.0
## - chlorides             1   553.02 2575.3
## - total.sulfur.dioxide  1   555.25 2580.4
## - sulphates             1   569.63 2613.1
## - volatile.acidity      1   574.86 2624.8
## - alcohol               1   626.42 2734.7
## 
## Step:  AIC=2561.23
## .outcome ~ volatile.acidity + chlorides + free.sulfur.dioxide + 
##     total.sulfur.dioxide + pH + sulphates + alcohol
## 
##                        Df Deviance    AIC
## <none>                      546.98 2561.2
## - free.sulfur.dioxide   1   549.61 2565.4
## - pH                    1   553.03 2573.3
## - chlorides             1   553.06 2573.4
## - total.sulfur.dioxide  1   555.28 2578.5
## - sulphates             1   569.92 2611.8
## - volatile.acidity      1   575.12 2623.4
## - alcohol               1   647.06 2774.2
## Start:  AIC=2516.28
## .outcome ~ fixed.acidity + volatile.acidity + citric.acid + residual.sugar + 
##     chlorides + free.sulfur.dioxide + total.sulfur.dioxide + 
##     density + pH + sulphates + alcohol
## 
##                        Df Deviance    AIC
## - density               1   524.89 2514.5
## - residual.sugar        1   524.90 2514.5
## - fixed.acidity         1   524.93 2514.6
## - citric.acid           1   525.17 2515.2
## <none>                      524.80 2516.3
## - pH                    1   525.89 2516.9
## - free.sulfur.dioxide   1   527.08 2519.8
## - chlorides             1   531.16 2529.7
## - total.sulfur.dioxide  1   532.44 2532.8
## - sulphates             1   544.34 2561.0
## - volatile.acidity      1   554.81 2585.4
## - alcohol               1   562.10 2602.1
## 
## Step:  AIC=2514.52
## .outcome ~ fixed.acidity + volatile.acidity + citric.acid + residual.sugar + 
##     chlorides + free.sulfur.dioxide + total.sulfur.dioxide + 
##     pH + sulphates + alcohol
## 
##                        Df Deviance    AIC
## - residual.sugar        1   524.92 2512.6
## - fixed.acidity         1   524.93 2512.6
## - citric.acid           1   525.27 2513.4
## <none>                      524.89 2514.5
## - pH                    1   527.23 2518.2
## - free.sulfur.dioxide   1   527.26 2518.3
## - chlorides             1   531.53 2528.6
## - total.sulfur.dioxide  1   532.71 2531.4
## - sulphates             1   544.98 2560.6
## - volatile.acidity      1   556.09 2586.4
## - alcohol               1   620.45 2726.4
## 
## Step:  AIC=2512.59
## .outcome ~ fixed.acidity + volatile.acidity + citric.acid + chlorides + 
##     free.sulfur.dioxide + total.sulfur.dioxide + pH + sulphates + 
##     alcohol
## 
##                        Df Deviance    AIC
## - fixed.acidity         1   524.97 2510.7
## - citric.acid           1   525.28 2511.5
## <none>                      524.92 2512.6
## - pH                    1   527.25 2516.2
## - free.sulfur.dioxide   1   527.35 2516.5
## - chlorides             1   531.53 2526.6
## - total.sulfur.dioxide  1   532.72 2529.4
## - sulphates             1   545.03 2558.7
## - volatile.acidity      1   556.14 2584.5
## - alcohol               1   622.10 2727.8
## 
## Step:  AIC=2510.7
## .outcome ~ volatile.acidity + citric.acid + chlorides + free.sulfur.dioxide + 
##     total.sulfur.dioxide + pH + sulphates + alcohol
## 
##                        Df Deviance    AIC
## - citric.acid           1   525.30 2509.5
## <none>                      524.97 2510.7
## - free.sulfur.dioxide   1   527.47 2514.8
## - pH                    1   528.76 2517.9
## - chlorides             1   532.39 2526.7
## - total.sulfur.dioxide  1   533.69 2529.8
## - sulphates             1   545.32 2557.3
## - volatile.acidity      1   557.78 2586.2
## - alcohol               1   622.92 2727.5
## 
## Step:  AIC=2509.52
## .outcome ~ volatile.acidity + chlorides + free.sulfur.dioxide + 
##     total.sulfur.dioxide + pH + sulphates + alcohol
## 
##                        Df Deviance    AIC
## <none>                      525.30 2509.5
## - free.sulfur.dioxide   1   528.19 2514.5
## - pH                    1   528.86 2516.1
## - chlorides             1   533.66 2527.7
## - total.sulfur.dioxide  1   534.89 2530.7
## - sulphates             1   545.37 2555.5
## - volatile.acidity      1   565.36 2601.5
## - alcohol               1   625.08 2729.9
## Start:  AIC=2517.3
## .outcome ~ fixed.acidity + volatile.acidity + citric.acid + residual.sugar + 
##     chlorides + free.sulfur.dioxide + total.sulfur.dioxide + 
##     density + pH + sulphates + alcohol
## 
##                        Df Deviance    AIC
## - fixed.acidity         1   525.22 2515.3
## - density               1   525.25 2515.4
## - residual.sugar        1   525.43 2515.8
## - citric.acid           1   525.68 2516.4
## <none>                      525.21 2517.3
## - free.sulfur.dioxide   1   526.18 2517.7
## - pH                    1   527.89 2521.8
## - total.sulfur.dioxide  1   532.50 2532.9
## - chlorides             1   536.19 2541.7
## - volatile.acidity      1   550.12 2574.6
## - sulphates             1   551.46 2577.7
## - alcohol               1   560.40 2598.2
## 
## Step:  AIC=2515.31
## .outcome ~ volatile.acidity + citric.acid + residual.sugar + 
##     chlorides + free.sulfur.dioxide + total.sulfur.dioxide + 
##     density + pH + sulphates + alcohol
## 
##                        Df Deviance    AIC
## - density               1   525.26 2513.4
## - residual.sugar        1   525.45 2513.9
## - citric.acid           1   525.71 2514.5
## <none>                      525.22 2515.3
## - free.sulfur.dioxide   1   526.21 2515.7
## - pH                    1   530.89 2527.1
## - total.sulfur.dioxide  1   533.03 2532.2
## - chlorides             1   536.95 2541.6
## - volatile.acidity      1   550.18 2572.7
## - sulphates             1   551.87 2576.6
## - alcohol               1   574.28 2627.5
## 
## Step:  AIC=2513.42
## .outcome ~ volatile.acidity + citric.acid + residual.sugar + 
##     chlorides + free.sulfur.dioxide + total.sulfur.dioxide + 
##     pH + sulphates + alcohol
## 
##                        Df Deviance    AIC
## - residual.sugar        1   525.45 2511.9
## - citric.acid           1   526.08 2513.4
## <none>                      525.26 2513.4
## - free.sulfur.dioxide   1   526.26 2513.8
## - pH                    1   530.99 2525.3
## - total.sulfur.dioxide  1   533.08 2530.3
## - chlorides             1   537.10 2539.9
## - volatile.acidity      1   552.53 2576.2
## - sulphates             1   552.80 2576.8
## - alcohol               1   615.49 2714.2
## 
## Step:  AIC=2511.87
## .outcome ~ volatile.acidity + citric.acid + chlorides + free.sulfur.dioxide + 
##     total.sulfur.dioxide + pH + sulphates + alcohol
## 
##                        Df Deviance    AIC
## - citric.acid           1   526.18 2511.6
## <none>                      525.45 2511.9
## - free.sulfur.dioxide   1   526.55 2512.5
## - pH                    1   531.22 2523.8
## - total.sulfur.dioxide  1   533.14 2528.5
## - chlorides             1   537.18 2538.1
## - volatile.acidity      1   552.57 2574.2
## - sulphates             1   552.91 2575.0
## - alcohol               1   616.79 2714.9
## 
## Step:  AIC=2511.64
## .outcome ~ volatile.acidity + chlorides + free.sulfur.dioxide + 
##     total.sulfur.dioxide + pH + sulphates + alcohol
## 
##                        Df Deviance    AIC
## <none>                      526.18 2511.6
## - free.sulfur.dioxide   1   527.60 2513.1
## - pH                    1   531.39 2522.2
## - total.sulfur.dioxide  1   534.85 2530.6
## - chlorides             1   539.00 2540.4
## - sulphates             1   552.97 2573.2
## - volatile.acidity      1   555.90 2579.9
## - alcohol               1   617.53 2714.4
## Start:  AIC=3164.28
## .outcome ~ fixed.acidity + volatile.acidity + citric.acid + residual.sugar + 
##     chlorides + free.sulfur.dioxide + total.sulfur.dioxide + 
##     density + pH + sulphates + alcohol
## 
##                        Df Deviance    AIC
## - density               1   666.70 3163.0
## - fixed.acidity         1   666.80 3163.2
## - residual.sugar        1   666.91 3163.5
## - citric.acid           1   667.06 3163.8
## <none>                      666.41 3164.3
## - free.sulfur.dioxide   1   668.10 3166.3
## - pH                    1   668.37 3167.0
## - chlorides             1   674.80 3182.3
## - total.sulfur.dioxide  1   674.84 3182.4
## - sulphates             1   693.38 3225.7
## - volatile.acidity      1   700.03 3241.0
## - alcohol               1   712.08 3268.3
## 
## Step:  AIC=3162.96
## .outcome ~ fixed.acidity + volatile.acidity + citric.acid + residual.sugar + 
##     chlorides + free.sulfur.dioxide + total.sulfur.dioxide + 
##     pH + sulphates + alcohol
## 
##                        Df Deviance    AIC
## - fixed.acidity         1   666.81 3161.2
## - residual.sugar        1   666.93 3161.5
## - citric.acid           1   667.35 3162.5
## <none>                      666.70 3163.0
## - free.sulfur.dioxide   1   668.53 3165.3
## - pH                    1   671.02 3171.3
## - total.sulfur.dioxide  1   675.43 3181.8
## - chlorides             1   675.46 3181.8
## - sulphates             1   693.98 3225.1
## - volatile.acidity      1   701.70 3242.8
## - alcohol               1   786.37 3424.9
## 
## Step:  AIC=3161.22
## .outcome ~ volatile.acidity + citric.acid + residual.sugar + 
##     chlorides + free.sulfur.dioxide + total.sulfur.dioxide + 
##     pH + sulphates + alcohol
## 
##                        Df Deviance    AIC
## - residual.sugar        1   667.06 3159.8
## - citric.acid           1   667.37 3160.6
## <none>                      666.81 3161.2
## - free.sulfur.dioxide   1   668.71 3163.8
## - pH                    1   673.87 3176.1
## - chlorides             1   676.75 3182.9
## - total.sulfur.dioxide  1   676.84 3183.1
## - sulphates             1   694.48 3224.2
## - volatile.acidity      1   703.04 3243.8
## - alcohol               1   787.44 3425.1
## 
## Step:  AIC=3159.84
## .outcome ~ volatile.acidity + citric.acid + chlorides + free.sulfur.dioxide + 
##     total.sulfur.dioxide + pH + sulphates + alcohol
## 
##                        Df Deviance    AIC
## - citric.acid           1   667.54 3159.0
## <none>                      667.06 3159.8
## - free.sulfur.dioxide   1   669.13 3162.8
## - pH                    1   674.20 3174.9
## - total.sulfur.dioxide  1   676.89 3181.2
## - chlorides             1   676.89 3181.2
## - sulphates             1   694.51 3222.3
## - volatile.acidity      1   703.04 3241.8
## - alcohol               1   789.73 3427.8
## 
## Step:  AIC=3158.98
## .outcome ~ volatile.acidity + chlorides + free.sulfur.dioxide + 
##     total.sulfur.dioxide + pH + sulphates + alcohol
## 
##                        Df Deviance    AIC
## <none>                      667.54 3159.0
## - free.sulfur.dioxide   1   669.93 3162.7
## - pH                    1   674.61 3173.8
## - total.sulfur.dioxide  1   678.32 3182.6
## - chlorides             1   678.35 3182.7
## - sulphates             1   694.60 3220.5
## - volatile.acidity      1   709.85 3255.3
## - alcohol               1   792.02 3430.4
#model_stepwise

Explore anything highly correlated:

correlation_df <- cor(df)

#correlation_df

# fixed acidity to ph and sulphates
correlation_df_melt <- melt(correlation_df)

gz <- ggplot(correlation_df_melt, mapping = aes(x = Var1, y = Var2, fill = value)) + 
  geom_tile() + 
  theme(axis.text.x = element_text(angle = 90, hjust = 1))+
  theme(text = element_text(size = 8)) + 
  ggtitle("Heat map for correlation") + 
  ylab("")+
  xlab("")+
  scale_fill_distiller(palette = "RdPu")

ggplotly(gz, tooltip = "text")
minimized_lm_df <- df %>% 
  select(-free.sulfur.dioxide, -citric.acid, -density, -volatile.acidity, -pH)

minimized_lm <- lm(data = minimized_lm_df, quality ~ .)

summary(minimized_lm)
## 
## Call:
## lm(formula = quality ~ ., data = minimized_lm_df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.71086 -0.37485 -0.06023  0.48038  2.08242 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           1.5204529  0.2113680   7.193 9.69e-13 ***
## fixed.acidity         0.0485196  0.0101132   4.798 1.76e-06 ***
## residual.sugar        0.0057865  0.0124562   0.465    0.642    
## chlorides            -2.3443435  0.4030377  -5.817 7.24e-09 ***
## total.sulfur.dioxide -0.0023418  0.0005467  -4.283 1.95e-05 ***
## sulphates             1.1850119  0.1116567  10.613  < 2e-16 ***
## alcohol               0.3100041  0.0171438  18.083  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6742 on 1592 degrees of freedom
## Multiple R-squared:  0.3057, Adjusted R-squared:  0.3031 
## F-statistic: 116.8 on 6 and 1592 DF,  p-value: < 2.2e-16

Lasso

findCorrelation(
  cor(df),
  cutoff = 0.5,
  verbose = TRUE,
  names = TRUE
)
## Compare row 3  and column  1 with corr  0.672 
##   Means:  0.293 vs 0.19 so flagging column 3 
## Compare row 1  and column  8 with corr  0.668 
##   Means:  0.245 vs 0.172 so flagging column 1 
## Compare row 7  and column  6 with corr  0.668 
##   Means:  0.174 vs 0.169 so flagging column 7 
## All correlations <= 0.5
## [1] "citric.acid"          "fixed.acidity"        "total.sulfur.dioxide"
ggcorrplot(correlation_df, hc.order = TRUE, type = "lower")

df_less_acid <- df %>% 
  select(-citric.acid, -fixed.acidity)

lm_less_acid <- lm(data = df_less_acid, quality ~ .)
summary(lm_less_acid)
## 
## Call:
## lm(formula = quality ~ ., data = df_less_acid)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.66740 -0.37223 -0.04556  0.46264  2.03092 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          12.0575013 12.0089257   1.004   0.3155    
## volatile.acidity     -1.0127925  0.1009279 -10.035  < 2e-16 ***
## residual.sugar        0.0115046  0.0134606   0.855   0.3929    
## chlorides            -2.0490938  0.3992190  -5.133 3.21e-07 ***
## free.sulfur.dioxide   0.0048741  0.0021385   2.279   0.0228 *  
## total.sulfur.dioxide -0.0035687  0.0006939  -5.143 3.03e-07 ***
## density              -7.5668358 11.8648390  -0.638   0.5237    
## pH                   -0.4919908  0.1210931  -4.063 5.08e-05 ***
## sulphates             0.9024434  0.1129657   7.989 2.60e-15 ***
## alcohol               0.2810022  0.0201992  13.912  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.648 on 1589 degrees of freedom
## Multiple R-squared:  0.3598, Adjusted R-squared:  0.3562 
## F-statistic: 99.22 on 9 and 1589 DF,  p-value: < 2.2e-16

Lasso